Read in the data

library (readr)

urlfile1="https://raw.githubusercontent.com/jsegich/STA553/main/data/income_per_person.csv"

urlfile2="https://raw.githubusercontent.com/jsegich/STA553/main/data/life_expectancy_years.csv"

urlfile3="https://raw.githubusercontent.com/jsegich/STA553/main/data/population_total.csv"

urlfile4="https://raw.githubusercontent.com/jsegich/STA553/main/data/countries_total.csv"

income_per_person<-read_csv(url(urlfile1))

life_expectancy_years<-read_csv(url(urlfile2))

population_total<-read_csv(url(urlfile3))

countries_total<-read_csv(url(urlfile4))

Prepare a single data set based on data set 12

income_per_person_fin <- income_per_person %>%
  gather(key = "Year",       # the column names of the wide table
         value = "Income",  # the numerical values of the table
         - geo,          # drop country variable: its value will not be gathered (stacked)!
         na.rm = FALSE)       # removing records with missing values

life_expectancy_years_fin <- life_expectancy_years %>%
  gather(key = "Year",       # the column names of the wide table
         value = "Life Expectancy",  # the numerical values of the table
         - geo,          # drop country variable: its value will not be gathered (stacked)!
         na.rm = FALSE)       # removing records with missing values    

population_total_fin <- population_total %>%
  gather(key = "Year",      
         value = "Population",  
         - geo,          
         na.rm = FALSE) 

#Join Life Expectancy and Income Per Person

LifeExpIncom <- full_join(life_expectancy_years_fin,income_per_person_fin)

#Change year variable to be 4 digits

LifeExpIncom <- LifeExpIncom  %>%
                      mutate(year = substr(Year,1,5)) %>%
                      select(-Year)

#Merge income and life exp with Country info

innerjoin = merge(x = LifeExpIncom, y = countries_total, by.x = 'geo', by.y ='name', all = TRUE)


##Final data set 

Final_Set <- full_join(LifeExpIncom,innerjoin)

#Need a set that includes population information and region info

#rename year to match Year in other set

population_total_fin<- rename(population_total_fin,year='Year')

#Join the sets

innerjoin2 = merge(x = population_total_fin, y = countries_total, by.x = 'geo', by.y ='name', all = TRUE)

Fin= full_join(x=Final_Set,y=innerjoin2 , by.x = 'geo', by.y ='geo', all = TRUE)

#Subset the data to only include 2015

Sub_2015<- subset(Final_Set, Final_Set$year=="2015")

Sub_2015.population<-subset(population_total_fin, population_total_fin$year=="2015")

#Change the variable name

Sub_2015<- rename(Sub_2015,Life.Expectancy='Life Expectancy')

Final_Set<- rename(Final_Set,Life.Expectancy= 'Life Expectancy')

# Add population total

Final_Sub_2015 = merge(x=Sub_2015,Sub_2015.population , by.x = 'geo', by.y ='geo', all = FALSE)

Here we create a scatter plot for data from 2015

fig <- Final_Sub_2015 %>%
  plot_ly(
    type = 'scatter',
    mode = 'markers',
    alpha  = 0.5,
    x = ~Final_Sub_2015$Life.Expectancy,
    sizes = c(5, 20),
    y = ~Final_Sub_2015$Income,
    size = sqrt(Final_Sub_2015$Population),
    color = Final_Sub_2015$geo,
    text = ~paste('Population:', Population, '<br>Country:', geo),
    marker = list(symbol = 'circle', sizemode = 'diameter',
                      line = list(width = 2))

  ) 
 
fig %>% layout(
  
title = list(text = "Life Expectance vs. Income"),
    
xaxis = list( 
                    title=list(text = 'Life Expectancy')),

yaxis = list( 
                    title=list(text = 'Income in USD'))

)
fig

Here we create an animated plot

#Subset to only include variables of interest

Finy<-mutate(Fin,Num_year=as.numeric(Fin$year))

Fin<-Finy[,c("Life Expectancy","Income","region", "Population", "Num_year") ]

#Omit Missing Values

Fin<-na.omit(Fin)

p <- ggplot(data=Fin, aes(x = Fin$`Life Expectancy`, 
                           y=Fin$Income, 
                           size = Population, 
                           colour = region)) +
        geom_point(aes(size = Population, ids = region ),
                   show.legend = TRUE, 
                   alpha = 0.4) +
        scale_size(range = c(2, 12)) +
        scale_y_log10() +
        labs(x = "Life Expectancy", 
             y = "Income in USD",
             title="                              Life Expectancy vs. Income") +
  
        ## gganimate command
       transition_time(Fin$Num_year)
## 
animate(p, renderer = gifski_renderer())

Read in the data

urlfile1="https://raw.githubusercontent.com/jsegich/STA553/main/data/POC.csv"

gas_stations<-read_csv(url(urlfile1))

head(gas_stations)
## # A tibble: 6 x 32
##      X1 site_row_id STATE county  ADDRESS  CITY  ycoord xcoord SITE_DESCRIPTION 
##   <dbl> <chr>       <chr> <chr>   <chr>    <chr>  <dbl>  <dbl> <chr>            
## 1     1 1-3R8J-494  CA    Los An… 37120 4… PALM…   34.6  -118. Los Angeles-Long…
## 2     2 1-3R8J-362  WA    Frankl… 1212 N … PASCO   46.2  -119. Kennewick-Pasco-…
## 3     3 1-3R8J-199  NV    Washoe  99 DAMO… RENO    39.4  -120. Reno-Sparks NV   
## 4     4 1-3R8J-261  UT    Salt L… 5404 S … SALT…   40.7  -112. Salt Lake City UT
## 5     5 1-3R8J-493  CA    Los An… 1731 E … LANC…   34.7  -118. Los Angeles-Long…
## 6     6 1-3R8J-508  WA    Benton  2707 S … KENN…   46.2  -119. Kennewick-Pasco-…
## # … with 23 more variables: service_or_fuel <chr>, diesel <chr>,
## #   twentyfour_hour_flag <chr>, car_wash <chr>, truckstop_flag <chr>,
## #   description <chr>, PUMP_TECH <chr>, POC <dbl>, HIFCA <dbl>, ZIPnew <dbl>,
## #   POCAGE <dbl>, POCGAP <dbl>, ZIPPOC <dbl>, HFG <dbl>, MSA <dbl>,
## #   dist.to.poc <dbl>, cate.poc.density <chr>, cate.poc.age <chr>,
## #   cate.poc.age.20 <chr>, cate.poc.intensity <chr>,
## #   cate.poc.intensity.tot <chr>, MSA_POC <dbl>, MSA_POC.1 <dbl>

Create random sample of 500 Gas stations, and provide requested information for each

library(leaflet)

gas_sub= sample_n(gas_stations, 500)

str(gas_sub)
## spec_tbl_df [500 × 32] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ X1                    : num [1:500] 53030 4730 57608 1890 32306 ...
##  $ site_row_id           : chr [1:500] "1-1APKRO5" "1-40CZ-4579" "1-1FE3H5W" "1-3UVO-3853" ...
##  $ STATE                 : chr [1:500] "GA" "CO" "IN" "OH" ...
##  $ county                : chr [1:500] "Henry" "Douglas" "Putnam" "Franklin" ...
##  $ ADDRESS               : chr [1:500] "6105 HWY 155 N" "830 WILCOX ST" "1207 S BLOOMINGTON ST" "215 W BRIDGE ST" ...
##  $ CITY                  : chr [1:500] "STOCKBRIDGE" "CASTLE ROCK" "GREENCASTLE" "DUBLIN" ...
##  $ ycoord                : num [1:500] 33.6 39.4 39.6 40.1 32.8 ...
##  $ xcoord                : num [1:500] -84.2 -104.9 -86.9 -83.1 -97.3 ...
##  $ SITE_DESCRIPTION      : chr [1:500] "Atlanta-Sandy Springs-Marietta GA" "Denver-Aurora-Broomfield CO" "RURAL" "Columbus OH" ...
##  $ service_or_fuel       : chr [1:500] "Fuel" "Fuel" "Fuel" "Fuel" ...
##  $ diesel                : chr [1:500] "N" "Y" "Y" "N" ...
##  $ twentyfour_hour_flag  : chr [1:500] "Y" "N" "Y" "N" ...
##  $ car_wash              : chr [1:500] "N" "N" "N" "N" ...
##  $ truckstop_flag        : chr [1:500] "Y" "N" "N" "N" ...
##  $ description           : chr [1:500] "URBAN" "URBAN" "RURAL" "URBAN" ...
##  $ PUMP_TECH             : chr [1:500] "C" "O" "O" "O" ...
##  $ POC                   : num [1:500] 0 0 0 0 0 0 0 0 0 0 ...
##  $ HIFCA                 : num [1:500] 0 1 0 1 1 1 0 0 0 0 ...
##  $ ZIPnew                : num [1:500] 30281 80104 46135 43017 76137 ...
##  $ POCAGE                : num [1:500] NA NA NA NA NA NA NA NA NA NA ...
##  $ POCGAP                : num [1:500] NA NA NA NA NA NA NA NA NA NA ...
##  $ ZIPPOC                : num [1:500] 2 0 0 0 1 0 0 0 0 0 ...
##  $ HFG                   : num [1:500] 0 0 0 0 0 0 0 0 0 0 ...
##  $ MSA                   : num [1:500] 520 2080 0 1840 2800 4760 0 6160 5720 2440 ...
##  $ dist.to.poc           : num [1:500] 5.361 34.094 39.171 9.112 0.233 ...
##  $ cate.poc.density      : chr [1:500] "(5,Inf]" "(-1e-06,1]" "(-1e-06,1]" "(1,5]" ...
##  $ cate.poc.age          : chr [1:500] "(15,140]" "(140,Inf]" "(140,Inf]" "(0,15]" ...
##  $ cate.poc.age.20       : chr [1:500] "(15,140]" "(140,Inf]" "(140,Inf]" "(0,15]" ...
##  $ cate.poc.intensity    : chr [1:500] "(5,Inf]" "(-0.0001,0]" "(-0.0001,0]" "(0,5]" ...
##  $ cate.poc.intensity.tot: chr [1:500] "(8,Inf]" "(-0.0001,0]" "(-0.0001,0]" "(0,8]" ...
##  $ MSA_POC               : num [1:500] 1 0 0 0 1 0 0 0 0 0 ...
##  $ MSA_POC.1             : num [1:500] 1 0 0 0 1 0 0 0 0 0 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   X1 = col_double(),
##   ..   site_row_id = col_character(),
##   ..   STATE = col_character(),
##   ..   county = col_character(),
##   ..   ADDRESS = col_character(),
##   ..   CITY = col_character(),
##   ..   ycoord = col_double(),
##   ..   xcoord = col_double(),
##   ..   SITE_DESCRIPTION = col_character(),
##   ..   service_or_fuel = col_character(),
##   ..   diesel = col_character(),
##   ..   twentyfour_hour_flag = col_character(),
##   ..   car_wash = col_character(),
##   ..   truckstop_flag = col_character(),
##   ..   description = col_character(),
##   ..   PUMP_TECH = col_character(),
##   ..   POC = col_double(),
##   ..   HIFCA = col_double(),
##   ..   ZIPnew = col_double(),
##   ..   POCAGE = col_double(),
##   ..   POCGAP = col_double(),
##   ..   ZIPPOC = col_double(),
##   ..   HFG = col_double(),
##   ..   MSA = col_double(),
##   ..   dist.to.poc = col_double(),
##   ..   cate.poc.density = col_character(),
##   ..   cate.poc.age = col_character(),
##   ..   cate.poc.age.20 = col_character(),
##   ..   cate.poc.intensity = col_character(),
##   ..   cate.poc.intensity.tot = col_character(),
##   ..   MSA_POC = col_double(),
##   ..   MSA_POC.1 = col_double()
##   .. )
label.msg <- paste(paste("State:", gas_sub$STATE),paste("Zip:", gas_sub$ZIPnew), paste("\n County:",gas_sub$county),"\n")

redicon <- makeIcon(
  iconUrl = "https://raw.githubusercontent.com/jsegich/STA553/main/data/0f61ba72e0e12ba59d30a50295964871.png?raw=true",
  iconWidth = 60, iconHeight = 60
  )
# define a leaflet map 

map12<- leaflet(gas_sub) %>%
  addTiles() %>% 
  setView(lng=mean(gas_sub$xcoord), lat=mean(gas_sub$ycoord), zoom = 4) %>%

  addMarkers(~gas_sub$xcoord, ~gas_sub$ycoord,label = ~label.msg, icon=redicon)

map12